## Descriptive statistics for Figure 1 main paper and Table 1 SI are in section (3)
## Regression models for Figures 2 & 3 main paper and Tables 2 & 3 SI are in section (4) - along with other model specifications in Tables 4 & 5 SI.
## Difference in means tests for Tables 6 - 15 SI are in section (5).

#(1) Packages ####
#install.packages("MASS")  
library(MASS)
#install.packages("tidyverse")  
library(tidyverse)

#(2) Data ####
# 2.1 Read in data 
# setwd() as required
dforiginal <- read.csv("Replication_data.csv")

# 2.2 Convert necessary variables and create main outcome variables for analysis
dforiginal <- dforiginal %>%
  mutate(
    # 2.2.1 Convert categorical variables to numeric
    Blame_question_pub_1 = as.numeric(Blame_question_pub_1),
    Blame_question_pub_2 = as.numeric(Blame_question_pub_2),
    Blame_question_pri_1 = as.numeric(Blame_question_pri_1),
    Blame_question_pri_2 = as.numeric(Blame_question_pri_2),
    High_numeric = as.numeric(High),
    Low_numeric = as.numeric(Low),
    Control_numeric = as.numeric(Control),
    CSL_numeric = as.numeric(Cleaner.Streets.Ltd.),
    
    # 2.2.2 Create numeric response variable
    response_approval_numeric = case_when(
      response_approval == "Strongly disapprove" ~ 1,
      response_approval == "Disapprove" ~ 2,
      response_approval == "Somewhat disapprove" ~ 3,
      response_approval == "Somewhat approve" ~ 4,
      response_approval == "Approve" ~ 5,
      response_approval == "Strongly approve" ~ 6,
      TRUE ~ NA_integer_
    ),
    
    # 2.2.3 Create binary outcome variable
    response_approval_numeric_binary = case_when(
      response_approval %in% c("Strongly disapprove", "Disapprove", "Somewhat disapprove") ~ 0,
      response_approval %in% c("Somewhat approve", "Approve", "Strongly approve") ~ 1,
      TRUE ~ NA_integer_
    ),
    
    # 2.2.4 Create ordered response variable
    response_approval_ordered = factor(
      response_approval,
      levels = c("Strongly disapprove", "Disapprove", "Somewhat disapprove", 
                 "Somewhat approve", "Approve", "Strongly approve"),
      ordered = TRUE
    ),
    
    # 2.2.5 Create Statement_type and Delegation_type categorical variables
    Statement_type = case_when(
      Blame.shift.statement == 1 ~ "Shift_statement",
      Blame.claim.statement == 1 ~ "Claim_statement",
      Control.statement == 1 ~ "Control_statement",
      TRUE ~ NA_character_
    ),
    
    Delegation_type = case_when(
      High == 1 ~ "High_delegation",
      Control == 1 ~ "Control_delegation",
      Low == 1 ~ "Low_delegation",
      TRUE ~ NA_character_
    ),
    
    # 2.2.6 Convert Statement_type and Delegation_type to factors
    Statement_type = factor(
      Statement_type, 
      levels = c("Claim_statement", "Control_statement", "Shift_statement")
    ),
    Delegation_type = factor(
      Delegation_type, 
      levels = c("High_delegation", "Low_delegation", "Control_delegation")
    )
  )

# 2.3 Create Blame_agent_v_principal variable
dforiginal <- dforiginal %>%
  mutate(
    Blame_question_1 = rowSums(across(c(Blame_question_pub_1, Blame_question_pri_1)), na.rm = TRUE),
    Blame_question_2 = rowSums(across(c(Blame_question_pub_2, Blame_question_pri_2)), na.rm = TRUE),
    Blame_agent_v_principal = ifelse(Blame_question_2 > Blame_question_1, 1, 0)
  )

# 2.4 Organize covariates
dforiginal <- dforiginal %>%
  mutate(
    Gender = as.factor(Gender),
    Gender_convert = case_when(
      Gender %in% c("Non-binary / third gender", "Prefer to self-describe") ~ "Other gender identity",
      TRUE ~ as.character(Gender)
    ),
    Age = as.factor(Age),
    Ethnicity = as.factor(Ethnicity),
    Location = as.factor(Location),
    Employment = as.factor(Employment),
    Pol_party = as.factor(Pol_party),
    Pol_party_convert = case_when(
      Pol_party %in% c("Don't know", "Some other party (please write below)") ~ "None",
      TRUE ~ as.character(Pol_party)
    ),
    Education = as.factor(Education),
    Education_convert = case_when(
      Education %in% c("University Bachelors Degree", "Graduate or professional degree (MA, MS, MBA, PhD, JD, MD, DDS)") ~ "Graduate",
      Education %in% c("Some University but no degree", "Completed Secondary School", 
                       "Completed Primary School", "Vocational or Similar", "Some Secondary") ~ "Non-graduate",
      Education == "Prefer not to say" ~ "Did not answer",
      TRUE ~ as.character(Education)
    )
  )

#(3) Descriptives ####
# 3.1 Mean approval across relevant treatment combinations (Figure 1 main paper; Table 1 SI)
summary_all <- dforiginal %>%
  group_by(Statement_type) %>%
  summarize(
    Mean = mean(response_approval_numeric, na.rm = TRUE),
    SD   = sd(response_approval_numeric, na.rm = TRUE),
    N    = n(),
    SE   = SD / sqrt(N),
    CI_low  = Mean - qt(0.975, df = N - 1) * SE,
    CI_high = Mean + qt(0.975, df = N - 1) * SE
  ) %>%
  mutate(Group = "All")

summary_delegation <- dforiginal %>%
  group_by(Delegation_type, Statement_type) %>%
  summarize(
    Mean = mean(response_approval_numeric, na.rm = TRUE),
    SD   = sd(response_approval_numeric, na.rm = TRUE),
    N    = n(),
    SE   = SD / sqrt(N),
    CI_low  = Mean - qt(0.975, df = N - 1) * SE,
    CI_high = Mean + qt(0.975, df = N - 1) * SE
  ) %>%
  rename(Group = Delegation_type)

summary_pubpri <- dforiginal %>%
  group_by(Cleaner.Streets.Ltd., Statement_type) %>%
  summarize(
    Mean = mean(response_approval_numeric, na.rm = TRUE),
    SD   = sd(response_approval_numeric, na.rm = TRUE),
    N    = n(),
    SE   = SD / sqrt(N),
    CI_low  = Mean - qt(0.975, df = N - 1) * SE,
    CI_high = Mean + qt(0.975, df = N - 1) * SE
  ) %>%
  rename(Group = Cleaner.Streets.Ltd.) %>%
  mutate(Group = ifelse(Group == 1, "Private Sector", "Public Sector"))

summary_blame <- dforiginal %>%
  group_by(Blame_agent_v_principal, Statement_type) %>%
  summarize(
    Mean = mean(response_approval_numeric, na.rm = TRUE),
    SD   = sd(response_approval_numeric, na.rm = TRUE),
    N    = n(),
    SE   = SD / sqrt(N),
    CI_low  = Mean - qt(0.975, df = N - 1) * SE,
    CI_high = Mean + qt(0.975, df = N - 1) * SE
  ) %>%
  mutate(Group = ifelse(
    Blame_agent_v_principal == 1, "Blame Agent More", "Blame Principal More/Equally"))

#(4) Regression models ####
# 4.1 OLS models (Figure 2 main paper; Table 2 SI)
olsmodel1 <- lm(
  response_approval_numeric ~ Blame.shift.statement + Blame.claim.statement,
  data = dforiginal
)

olsmodel1.1 <- lm(
  response_approval_numeric ~ Blame.shift.statement + Blame.claim.statement +
    Age + Gender_convert + Ethnicity + Employment + Education_convert + Location + Pol_party_convert,
  data = dforiginal,
  subset = Gender != "Prefer not to say" & Ethnicity != "Prefer not to say" & Education != "Prefer not to say"
)

olsmodel2 <- lm(
  response_approval_numeric ~ Blame.shift.statement + Blame.claim.statement +
    Blame_agent_v_principal * Blame.shift.statement,
  data = dforiginal
)

olsmodel2.1 <- lm(
  response_approval_numeric ~ Blame.shift.statement + Blame.claim.statement +
    Blame_agent_v_principal * Blame.shift.statement +
    Age + Gender_convert + Ethnicity + Employment + Education_convert + Location + Pol_party_convert,
  data = dforiginal,
  subset = Gender != "Prefer not to say" & Ethnicity != "Prefer not to say" & Education != "Prefer not to say"
)

olsmodel3 <- lm(
  response_approval_numeric ~ Blame.shift.statement + Blame.claim.statement +
    High + Low + Cleaner.Streets.Ltd. +
    Blame.shift.statement * High +
    Blame.shift.statement * Low +
    Blame.shift.statement * Cleaner.Streets.Ltd.,
  data = dforiginal
)

olsmodel3.1 <- lm(
  response_approval_numeric ~ Blame.shift.statement + Blame.claim.statement +
    High + Low + Cleaner.Streets.Ltd. +
    Blame.shift.statement * High +
    Blame.shift.statement * Low +
    Blame.shift.statement * Cleaner.Streets.Ltd. +
    Age + Gender_convert + Ethnicity + Employment + Education_convert + Location + Pol_party_convert,
  data = dforiginal,
  subset = Gender != "Prefer not to say" & Ethnicity != "Prefer not to say" & Education != "Prefer not to say"
)

# 4.2 Logistic regression models (Figure 3 main paper; Table 3 SI)
logitmodel1 <- glm(
  Blame_agent_v_principal ~ High + Low + Cleaner.Streets.Ltd.,
  family = binomial(link = "logit"),
  data = dforiginal
)
exp(coef(logitmodel1))

logitmodel1.1 <- glm(
  Blame_agent_v_principal ~ High + Low + Cleaner.Streets.Ltd. +
    Age + Gender_convert + Ethnicity + Employment + Education_convert + Location + Pol_party_convert,
  family = binomial(link = "logit"),
  data = dforiginal,
  subset = Gender != "Prefer not to say" & Ethnicity != "Prefer not to say" & Education != "Prefer not to say"
)
exp(coef(logitmodel1.1))

# 4.3 Logistic regression models (Table 4 SI)
logitS1 <- glm(
  response_approval_numeric_binary ~ Blame.shift.statement + Blame.claim.statement,
  family = binomial(link = "logit"),
  data = dforiginal
)
exp(coef(logitS1))

logitS1.1 <- glm(
  response_approval_numeric_binary ~ Blame.shift.statement + Blame.claim.statement +
    Age + Gender_convert + Ethnicity + Employment + Education_convert + Location + Pol_party_convert,
  family = binomial(link = "logit"),
  data = dforiginal,
  subset = Gender != "Prefer not to say" & Ethnicity != "Prefer not to say" & Education != "Prefer not to say"
)
exp(coef(logitS1.1))

logitS2 <- glm(
  response_approval_numeric_binary ~ Blame.shift.statement + Blame.claim.statement +
    Blame_agent_v_principal * Blame.shift.statement,
  family = binomial(link = "logit"),
  data = dforiginal
)
exp(coef(logitS2))

logitS2.1 <- glm(
  response_approval_numeric_binary ~ Blame.shift.statement + Blame.claim.statement +
    Blame_agent_v_principal * Blame.shift.statement +
    Age + Gender_convert + Ethnicity + Employment + Education_convert + Location + Pol_party_convert,
  family = binomial(link = "logit"),
  data = dforiginal,
  subset = Gender != "Prefer not to say" & Ethnicity != "Prefer not to say" & Education != "Prefer not to say"
)
exp(coef(logitS2.1))

logitS3 <- glm(
  response_approval_numeric_binary ~ Blame.shift.statement + Blame.claim.statement +
    High + Low + Cleaner.Streets.Ltd. +
    Blame.shift.statement * High +
    Blame.shift.statement * Low +
    Blame.shift.statement * Cleaner.Streets.Ltd.,
  family = binomial(link = "logit"),
  data = dforiginal
)
exp(coef(logitS3))

logitS3.1 <- glm(
  response_approval_numeric_binary ~ Blame.shift.statement + Blame.claim.statement +
    High + Low + Cleaner.Streets.Ltd. +
    Blame.shift.statement * High +
    Blame.shift.statement * Low +
    Blame.shift.statement * Cleaner.Streets.Ltd. +
    Age + Gender_convert + Ethnicity + Employment + Education_convert + Location + Pol_party_convert,
  family = binomial(link = "logit"),
  data = dforiginal,
  subset = Gender != "Prefer not to say" & Ethnicity != "Prefer not to say" & Education != "Prefer not to say"
)
exp(coef(logitS3.1))

# 4.4 Ordinal logistic regression models (Table 5 SI)
# Note: polr() does not support subset argument, using filter() instead
polr1 <- dforiginal %>%
  polr(
    response_approval_ordered ~ Blame.shift.statement + Blame.claim.statement,
    data = .,
    Hess = TRUE
  )

polr1.1 <- dforiginal %>%
  filter(
    Gender != "Prefer not to say",
    Ethnicity != "Prefer not to say",
    Education != "Prefer not to say"
  ) %>%
  droplevels() %>%
  polr(
    response_approval_ordered ~ Blame.shift.statement + Blame.claim.statement +
      Age + Ethnicity + Education_convert + Gender_convert + Employment + Location + Pol_party_convert,
    data = .,
    Hess = TRUE
  )

polr2 <- dforiginal %>%
  polr(
    response_approval_ordered ~ Blame.shift.statement + Blame.claim.statement +
      Blame_agent_v_principal * Blame.shift.statement,
    data = .,
    Hess = TRUE
  )

polr2.1 <- dforiginal %>%
  filter(
    Gender != "Prefer not to say",
    Ethnicity != "Prefer not to say",
    Education != "Prefer not to say"
  ) %>%
  droplevels() %>%
  polr(
    response_approval_ordered ~ Blame.shift.statement + Blame.claim.statement +
      Blame_agent_v_principal * Blame.shift.statement +
      Age + Ethnicity + Education_convert + Gender_convert + Employment + Location + Pol_party_convert,
    data = .,
    Hess = TRUE
  )

polr3 <- dforiginal %>%
  polr(
    response_approval_ordered ~ Blame.shift.statement + Blame.claim.statement +
      High + Low + Cleaner.Streets.Ltd. +
      Blame.shift.statement * High +
      Blame.shift.statement * Low +
      Blame.shift.statement * Cleaner.Streets.Ltd.,
    data = .,
    Hess = TRUE
  )

polr3.1 <- dforiginal %>%
  filter(
    Gender != "Prefer not to say",
    Ethnicity != "Prefer not to say",
    Education != "Prefer not to say"
  ) %>%
  droplevels() %>%
  polr(
    response_approval_ordered ~ Blame.shift.statement + Blame.claim.statement +
      High + Low + Cleaner.Streets.Ltd. +
      Blame.shift.statement * High +
      Blame.shift.statement * Low +
      Blame.shift.statement * Cleaner.Streets.Ltd. +
      Age + Ethnicity + Education_convert + Gender_convert + Employment + Location + Pol_party_convert,
    data = .,
    Hess = TRUE
  )

#(5) Difference in means tests ####
# 5.1 Table 6 SI
t_test_0 <- t.test(
  response_approval_numeric ~ Blame.shift.statement,
  data = dforiginal,
  subset = Statement_type %in% c("Control_statement", "Shift_statement")
)

# 5.2 Table 7 SI
t_test_1 <- t.test(
  response_approval_numeric ~ Blame.claim.statement,
  data = dforiginal,
  subset = Statement_type %in% c("Control_statement", "Claim_statement")
)

# 5.3 Table 8 SI
t_test_2 <- t.test(
  response_approval_numeric ~ Blame.shift.statement,
  data = dforiginal,
  subset = Statement_type %in% c("Claim_statement", "Shift_statement")
)

# 5.4 Table 9 SI
anova_1 <- anova(
  lm(response_approval_numeric ~ Statement_type, 
     data = dforiginal))

# 5.5 Table 10 SI
t_test_3 <- t.test(
  response_approval_numeric ~ Blame_agent_v_principal,
  data = dforiginal,
  subset = Statement_type %in% c("Shift_statement")
)

# 5.6 Table 11 SI
t_test_4 <- t.test(
  response_approval_numeric ~ High,
  data = dforiginal,
  subset = (Statement_type %in% c("Shift_statement") & Delegation_type %in% c("High_delegation", "Control_delegation"))
)

# 5.7 Table 12 SI
t_test_5 <- t.test(
  response_approval_numeric ~ Low,
  data = dforiginal,
  subset = (Statement_type %in% c("Shift_statement") & Delegation_type %in% c("Low_delegation", "Control_delegation"))
)

# 5.8 Table 13 SI
t_test_6 <- t.test(
  response_approval_numeric ~ High,
  data = dforiginal,
  subset = (Statement_type %in% c("Shift_statement") & Delegation_type %in% c("High_delegation", "Low_delegation"))
)

# 5.9 Table 14 SI
anova_2 <- anova(
  lm(response_approval_numeric ~ Delegation_type, 
     data = dforiginal, 
     subset = Statement_type %in% c("Shift_statement")))

# 5.10 Table 15 SI
t_test_7 <- t.test(
  response_approval_numeric ~ Cleaner.Streets.Ltd.,
  data = dforiginal,
  subset = Statement_type %in% c("Shift_statement")
)